import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
import scipy.sparse as sparse

# print(plt.style.available) # uncomment to print all styles
import seaborn as sns
sns.set(font_scale=2)
plt.style.use('seaborn-whitegrid')
plt.rcParams['figure.figsize'] = (10,10)
%matplotlib inline

Sparse

Create a Sparse Matrix in COO¶

data = [1.9, -5.2, 0.3, 9.1, 4.4, 5.8, 3.6, 7.2, 2.7]
i    = [  0,    0,   1,   1,   2,   2,   2,   3,   3]
j    = [  1,    3,   0,   2,   0,   1,   2,   2,   3]
A = sparse.coo_matrix((data, (i, j)))

print(A)
print(A.todense())

  (0, 1)	1.9
  (0, 3)	-5.2
  (1, 0)	0.3
  (1, 2)	9.1
  (2, 0)	4.4
  (2, 1)	5.8
  (2, 2)	3.6
  (3, 2)	7.2
  (3, 3)	2.7
[[ 0.   1.9  0.  -5.2]
 [ 0.3  0.   9.1  0. ]
 [ 4.4  5.8  3.6  0. ]
 [ 0.   0.   7.2  2.7]]

data = [-5.2, 1.9, 0.3, 9.1, 4.4, 5.8, 3.6, 7.2, 2.7]
i    = [   0,   0,   1,   1,   2,   2,   2,   3,   3]
j    = [   3,   1,   0,   2,   0,   1,   2,   2,   3]
A = sparse.coo_matrix((data, (i, j)))
print(A.todense())

print(A.data)
print(A.data.dtype, 'Length: ', len(A.data))
print('-')
print(A.row)
print(A.row.dtype, 'Length: ', len(A.row))
print('-')
print(A.col)
print(A.col.dtype, 'Length: ', len(A.row))

Convert to CSR¶

A = A.tocsr()
print(A)
print(A.todense())

print(A.data)
print(A.data.dtype, 'Length: ', len(A.data))
print('-')
print(A.indptr)
print(A.indptr.dtype, 'Length: ', len(A.indptr))
print('-')
print(A.indices)
print(A.indices.dtype, 'Length: ', len(A.indices))

Try some timings: small, `Harvard500`¶

import scipy.io as sio
d = sio.loadmat('./Harvard500.mat')
A = d['Problem'][0][0][2].tocsr()

A

plt.figure(figsize=(10,10))
plt.spy(A, ms=5)

A.shape[0]

v = np.random.rand(A.shape[0])
w = np.random.rand(A.shape[0])

%timeit v = A * w

Adense = A.todense()

%timeit v = Adense.dot(w)

Medium `wb-cs-stanford`¶

d = sio.loadmat('./wb-cs-stanford.mat')
A = d['Problem'][0][0][2].tocsr()

plt.figure(figsize=(10,10))
plt.spy(A, ms=5)

A

v = np.random.rand(A.shape[0])
w = np.random.rand(A.shape[0])

%timeit v = A * w

Adense = A.todense()

%timeit v = Adense.dot(w)

Large `email-Enron`¶

d = sio.loadmat('./email-Enron.mat')
A = d['Problem'][0][0][2].tocsr()

plt.figure(figsize=(10,10))
plt.spy(A, ms=5)

A

v = np.random.rand(A.shape[0])
w = np.random.rand(A.shape[0])

%timeit v = A * w

Adense = A.todense()

%timeit v = Adense.dot(w)

Create a Sparse Matrix in COO¶

Convert to CSR¶

Try some timings: small, Harvard500¶

Medium wb-cs-stanford¶

Large email-Enron¶

Does 36692 sound "large"?¶

Try some timings: small, `Harvard500`¶

Medium `wb-cs-stanford`¶

Large `email-Enron`¶